import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Perceptron
from sklearn import datasets
from sklearn.model_selection import train_test_split
podaci_vino = datasets.load_wine()
podaci, oznake = podaci_vino.data[:,[0,1,11]], podaci_vino.target
podaci_tren, podaci_test, oznake_tren, oznake_test = train_test_split(podaci, oznake, test_size=0.2,
stratify=oznake, random_state=1)
from sklearn.preprocessing import StandardScaler
std = StandardScaler()
podaci_tren_std = std.fit_transform(podaci_tren)
podaci_test_std = std.transform(podaci_test)
boje = ['yellow', 'blue', 'red']
def prikaz3d(podaci, oznake, labele):
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,10))
ax = Axes3D(fig)
for oznaka, boja in zip(np.unique(oznake), boje):
ax.scatter(podaci[oznake==oznaka, 0], podaci[oznake==oznaka, 1], podaci[oznake==oznaka, 2],
c=boja, label=oznaka, marker='o', s=100)
ax.set_xlabel(labele[0])
ax.set_ylabel(labele[1])
ax.set_zlabel(labele[2])
plt.legend(loc='best', prop={'size': 12})
ax.view_init(5, 75)
plt.show()
prikaz3d(podaci_tren_std, oznake_tren, ('alcohol', 'malic_acid', 'od280/od315_of_diluted_wines'))
perceptron = Perceptron(max_iter=8, eta0=0.1)
perceptron.fit(podaci_tren_std, oznake_tren)
podaci_pred = perceptron.predict(podaci_test_std)
print(f'Accuracy: {np.round(perceptron.score(podaci_test_std, oznake_test), 3)}')
def izracun_granica(podaci, klasifikator, rez):
x1_min, x1_max = podaci[:, 0].min() - 1, podaci[:, 0].max() + 1
x2_min, x2_max = podaci[:, 1].min() - 1, podaci[:, 1].max() + 1
x3_min, x3_max = podaci[:, 2].min() - 1, podaci[:, 2].max() + 1
xx1, xx2, xx3 = np.meshgrid(np.arange(x1_min, x1_max, rez),
np.arange(x2_min, x2_max, rez),
np.arange(x3_min, x3_max, rez))
boje = klasifikator.predict(np.array([xx1.ravel(), xx2.ravel(), xx3.ravel()]).T)
boje = boje.reshape(xx1.shape)
return xx1, xx2, xx3, boje
xx1, xx2, xx3, boje = izracun_granica(podaci_tren_std, perceptron, rez=0.1)
from mpl_toolkits.mplot3d import Axes3D
def prikaz_granica3d(xx1, xx2, xx3, boje, b, labele):
fig = plt.figure(figsize=(10,10))
ax = Axes3D(fig)
for i in range(0, len(np.unique(boje))):
ax.scatter(xx1[boje==i], xx2[boje==i], xx3[boje==i], c=b[i], marker='o', s=3, label=i, alpha=0.1)
ax.set_xlabel(labele[0])
ax.set_ylabel(labele[1])
ax.set_zlabel(labele[2])
plt.legend(loc="upper right", scatterpoints=7, markerscale=10.0, fontsize=10, prop={'size': 16})
ax.view_init(25, 60)
plt.show()
b = {0:'yellow', 1:'blue', 2:'red'}
labele = ('alcohol', 'malic_acid', 'od280/od315_of_diluted_wines')
prikaz_granica3d(xx1, xx2, xx3, boje, b, labele)
tezine = pd.DataFrame(np.hstack((perceptron.intercept_.reshape(3,1), perceptron.coef_)), columns=[f'w{i}' for i in range(4)])
tezine
from sklearn.linear_model import SGDClassifier
sgdc = SGDClassifier(loss='perceptron', eta0=0.1, max_iter=8)
sgdc.partial_fit(podaci_tren_std[:71,:], oznake_tren[:71], classes=(0,1,2))
print(f'Accuracy: {np.round(sgdc.score(podaci_test_std, oznake_test), 3)}')
sgdc.partial_fit(podaci_tren_std[71:,:], oznake_tren[71:])
print(f'Accuracy: {np.round(sgdc.score(podaci_test_std, oznake_test), 3)}')
from matplotlib.colors import ListedColormap
def prikaz2d(podaci, oznake, klasifikator, rez=0.01):
markeri = ['s', '^', 'o']
boje = ['red', 'blue', 'yellow']
cmap = ListedColormap(boje[:len(np.unique(oznake))])
x1_min, x1_max = podaci[:, 0].min() - 1, podaci[:, 0].max() + 1
x2_min, x2_max = podaci[:, 1].min() - 1, podaci[:, 1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, rez), np.arange(x2_min, x2_max, rez))
xx3 = klasifikator.predict(np.array([xx1.ravel(), xx2.ravel()]).T).reshape(xx1.shape)
plt.figure(figsize=(10,8))
plt.contourf(xx1, xx2, xx3, alpha=0.2, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
for idx, cl in enumerate(np.unique(oznake)):
plt.scatter(x=podaci[oznake == cl, 0], y=podaci[oznake == cl, 1], alpha=0.8, c=boje[idx],
marker=markeri[idx], label=cl, edgecolor='black')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc='best')
plt.show()
podaci, oznake = datasets.make_classification(n_samples=300, n_features=2, n_informative=2, n_redundant=0, flip_y=0.11,
n_classes=2, class_sep=0.9)
podaci_tren, podaci_test, oznake_tren, oznake_test = train_test_split(podaci, oznake, test_size=0.3,
stratify=oznake, random_state=1)
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1, penalty='l2', C=0.1)
lr.fit(podaci_tren, oznake_tren)
lr.score(podaci_test, oznake_test)
prikaz2d(podaci_tren, oznake_tren, lr), prikaz2d(podaci_test, oznake_test, lr);
df = pd.DataFrame(lr.predict_proba(podaci_test), columns=('klasa 0', 'klasa 1'))
df.head(5)
sd = lr.predict(podaci_test)
print(sd[:5])
podaci, oznake = datasets.make_classification(n_samples=1000, n_features=3, n_informative=3, n_redundant=0, flip_y=0.01,
n_classes=3, class_sep=0.95, n_clusters_per_class=2)
podaci_tren, podaci_test, oznake_tren, oznake_test = train_test_split(podaci, oznake, test_size=0.2,
stratify=oznake, random_state=1)
boje = ['yellow', 'blue', 'red']
prikaz3d(podaci_tren, oznake_tren, ('X1', 'X2', 'X3'))
from sklearn.svm import SVC
svc = SVC()
from sklearn.model_selection import GridSearchCV
raspon = [0.01, 0.1, 1.0, 10.0, 100.0]
mogucnosti = [{'C': raspon, 'kernel': ['linear']},
{'C': raspon, 'gamma': raspon, 'kernel': ['rbf']}]
gs = GridSearchCV(estimator=svc, param_grid=mogucnosti, scoring='accuracy',
cv=10, n_jobs=-1, verbose=True, return_train_score=True)
gs.fit(podaci_tren, oznake_tren)
gs.score(podaci_test, oznake_test)
gs.best_params_
svc = gs.best_estimator_
b = {0:'yellow', 1:'blue', 2:'red'}
labele = ('X1', 'X2', 'X3')
xx1, xx2, xx3, boje = izracun_granica(podaci_tren, svc, rez=0.1)
def prikaz3d_svi(xx1, xx2, xx3, boje, b, labele, podaci_tren, oznake_tren, podaci_test, oznake_test):
fig = plt.figure(figsize=(15,15))
ax = Axes3D(fig)
for i in range(0, len(np.unique(boje))):
ax.scatter(xx1[boje==i], xx2[boje==i], xx3[boje==i], c=b[i], marker='o', s=1, label=i, alpha=0.15)
ax.scatter(podaci_tren[oznake_tren==i,0], podaci_tren[oznake_tren==i,1], podaci_tren[oznake_tren==i,2],
c=b[i], marker='*', edgecolors='black', s=30, label='trening uzorci')
ax.scatter(podaci_test[oznake_test==i,0], podaci_test[oznake_test==i,1], podaci_test[oznake_test==i,2],
c=b[i], marker='s', edgecolors='black', s=30, label='testni uzorci')
ax.set_xlabel(labele[0])
ax.set_ylabel(labele[1])
ax.set_zlabel(labele[2])
plt.legend(loc="upper right", markerscale=3.0, fontsize=10, prop={'size': 16})
ax.view_init(20, 75)
plt.show()
prikaz3d_svi(xx1, xx2, xx3, boje, b, labele, podaci_tren, oznake_tren, podaci_test, oznake_test)
for i in range(0, len(np.unique(boje))):
fig = plt.figure(figsize=(10,10))
ax = Axes3D(fig)
ax.scatter(xx1[boje==i], xx2[boje==i], xx3[boje==i], c=b[i], marker='o', s=1, label=i, alpha=0.15)
ax.scatter(podaci_tren[oznake_tren==i,0], podaci_tren[oznake_tren==i,1], podaci_tren[oznake_tren==i,2],
c=b[i], marker='*', edgecolors='black', s=30, label='trening uzorci')
ax.scatter(podaci_test[oznake_test==i,0], podaci_test[oznake_test==i,1], podaci_test[oznake_test==i,2],
c=b[i], marker='s', edgecolors='black', s=30, label='testni uzorci')
ax.set_xlabel(labele[0])
ax.set_ylabel(labele[1])
ax.set_zlabel(labele[2])
plt.legend(loc="upper right", markerscale=3.0, fontsize=10, prop={'size': 16})
ax.view_init(20, 80)
plt.show()
from sklearn.tree import DecisionTreeClassifier
podaci_vino = datasets.load_wine()
podaci, oznake = podaci_vino.data, podaci_vino.target
podaci_tren, podaci_test, oznake_tren, oznake_test = train_test_split(podaci, oznake, test_size=0.2,
stratify=oznake, random_state=1)
df = pd.DataFrame(np.hstack((podaci,oznake.reshape(oznake.shape[0], 1))), columns=podaci_vino.feature_names + ['klasa'])
df.head(5)
dtc = DecisionTreeClassifier(criterion='entropy', max_depth=4)
dtc.fit(podaci_tren, oznake_tren)
dtc.score(podaci_test, oznake_test)
dtc.predict_proba(podaci_test)[:5]
print(dtc.predict(podaci_test)[:5])
from pydotplus import graph_from_dot_data
from sklearn.tree import export_graphviz
dot_data = export_graphviz(dtc, filled=True, rounded=True, class_names=list('012'),
feature_names=podaci_vino.feature_names, out_file=None)
graph = graph_from_dot_data(dot_data)
graph.write_png('tree.png');
from IPython.display import Image
Image(filename='tree.png')
podaci_vino = datasets.load_wine()
podaci, oznake = podaci_vino.data[:,[0,1,11]], podaci_vino.target
podaci_tren, podaci_test, oznake_tren, oznake_test = train_test_split(podaci, oznake, test_size=0.2,
stratify=oznake, random_state=1)
dtc = DecisionTreeClassifier(max_depth=2)
dtc.fit(podaci_tren, oznake_tren)
xx1, xx2, xx3, boje = izracun_granica(podaci_tren, dtc, rez=0.1)
b = {0:'yellow', 1:'blue', 2:'red'}
labele = ('alcohol', 'malic_acid', 'od280/od315_of_diluted_wines')
prikaz_granica3d(xx1, xx2, xx3, boje, b, labele)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(criterion='entropy')
mogucnosti = [{'n_estimators': range(1, 1001, 10)}]
gs = GridSearchCV(estimator=rfc, param_grid=mogucnosti, scoring='accuracy',
cv=10, n_jobs=-1, verbose=True, return_train_score=True)
gs.fit(podaci_tren, oznake_tren)
gs.score(podaci_test, oznake_test)
gs.best_params_
rfc = gs.best_estimator_
xx1, xx2, xx3, boje = izracun_granica(podaci_tren, rfc, rez=0.1)
b = {0:'yellow', 1:'blue', 2:'red'}
labele = ('alcohol', 'malic_acid', 'od280/od315_of_diluted_wines')
prikaz_granica3d(xx1, xx2, xx3, boje, b, labele)
prikaz3d_svi(xx1, xx2, xx3, boje, b, labele, podaci_tren, oznake_tren, podaci_test, oznake_test)